version 16.0
clear all
set more off
set seed 123456

*------------------------------------------------------------------------------
* PSM–DID master script 
* - Analysis 1: Impact of selling any crop (cci>0)
* - Analysis 2: Impact of selling cash crops among sellers
*------------------------------------------------------------------------------

* Install requirements 
cap which psmatch2
if _rc ssc install psmatch2, replace
cap which csdid
if _rc ssc install csdid, replace

*------------------------------
* Globals
*------------------------------

global data_path "C:/Users/andre/OneDrive/Desktop/BMGF/LSMS ISA/MAP/Data Fred/Panel"
global countries "MalPanel-5 TanPanel-5 UgaPanel-6 NigPanel-6 EthPanel-5"

*log using "$data_path/PSM-DID STATA output",replace

global covariates1 "lognf educh head_age head_sex dependency_ratio farmsize asset fadult madult educated infra_index vharvest"
global covariates2 "cci lognf educh head_age head_sex dependency_ratio farmsize asset fadult madult educated infra_index vharvest"

global reg_covariates "lognf educh head_age head_sex dependency_ratio vharvest farmsize asset"

foreach country of global countries {

    di as result _dup(70) "="
    di as result "          NOW PROCESSING: `country'"
    di as result _dup(70) "="

    use "$data_path/`country'.dta", clear
	keep if fadult > 0 & fadult != .
	ren emp empold 
	gen emp = 0
	replace emp = 1 if aweai3 >=0.94 & aweai3 != .
    * skip wave 1 if AWEAI not asked in first wave
    if inlist("`country'", "EthPanel-5", "TanPanel-5", "NigPanel-6") {
        quietly summarize t, meanonly
        drop if t==r(min)
    }

    xtset id t

    capture drop lognf
    generate double lognf = ln(Netnfcashinc + 1)

    egen baseline_year = min(t)


    *==========================================================================
    * ANALYSIS 1: any crop selling (cci>0)
    *==========================================================================
    di as result _dup(70) "-"
    di as result "Analysis 1: Impact of selling any crop (CCI)"
    di as result _dup(70) "-"

    bysort id (t): egen byte ever_seller = max(cci>0 & cci<.)
    gen byte sold_at_baseline = (cci>0 & cci<. & t==baseline_year)
    by id: egen byte was_seller_at_baseline = max(sold_at_baseline)

    gen double first_sale_year = t if (cci>0 & cci<.)
    by id (t): egen double gvar = min(first_sale_year)

    gen byte treatment_group = .
    replace treatment_group = 0 if ever_seller==0
    replace treatment_group = 1 if was_seller_at_baseline==0 & gvar>baseline_year & gvar<.

    * ---- Baseline-only matching with psmatch2
	preserve
	
    keep if t==baseline_year & inlist(treatment_group,0,1)
	
	

    * PSM (1-NN, with replacement; common support; logit PS)
    psmatch2 treatment_group ${covariates1}, outcome(emp) neighbor(1) ties logit common


    * Keep treated + matched controls inside common support
    keep if _support==1 & _weight>0
    keep id
    duplicates drop
    tempfile A1_matched_ids
    save `A1_matched_ids', replace

    restore, preserve

    * Merge back to full panel

    merge m:1 id using `A1_matched_ids', keep(match) nogen
	
	* Do we truly have never-treated in the estimation sample?
	count if missing(gvar)           // if 0, there are no never-treated left

	* What’s the last treatment time and last calendar time?
	summ gvar if gvar<., meanonly
	local gmax = r(max)
	summ t, meanonly
	local tmax = r(max)

	* Are there controls available at the last period for the last cohort?
	count if t==`tmax' & (gvar>`tmax' | missing(gvar))   // not-yet-treated or never-treated at tmax


    xtset id t

    * TWFE DID
    gen byte post = (t>=gvar) if treatment_group==1
    replace post = 0 if treatment_group==0
    gen byte treat_post = treatment_group*post
    di as result "DID (TWFE) on CCI matched panel"
    xtreg emp treat_post i.t ${reg_covariates}, fe vce(cluster id)

    * Staggered DID (csdid)
	* csdid: run for all except EthPanel-5
if ("`country'"=="EthPanel-5") {
di as txt "Skipping csdid for `country' as requested."
}
else {
    di as result "csdid for CCI (clustered)"
	csdid emp ${reg_covariates}, ivar(id) time(t) gvar(gvar) never vce(cluster id) method(ipw)
	estat simple
	estat group
	estat calendar
	estat event
	estat all
}
	
	
restore

    *==========================================================================
    * ANALYSIS 2: cash crop sales among sellers
    *==========================================================================
    di as result _dup(70) "-"
    di as result "Analysis 2: Impact of selling CASH CROPS (cci>0 sample)"
    di as result _dup(70) "-"

    preserve

    replace cash_sale = 0 if missing(cash_sale)
    by id: egen byte ever_cash_seller = max(cash_sale>0)

    gen byte sold_cash_at_baseline = (cash_sale>0 & t==baseline_year)
    by id: egen byte was_cash_seller_at_baseline = max(sold_cash_at_baseline)

    gen double first_cash_sale_year = t if cash_sale>0
    by id (t): egen double gvar_cash = min(first_cash_sale_year)

    gen byte treatment_group_cash = .
    replace treatment_group_cash = 0 if ever_cash_seller==0
    replace treatment_group_cash = 1 if was_cash_seller_at_baseline==0 & gvar_cash>baseline_year & gvar_cash<.

    drop if missing(treatment_group_cash)

    tempfile A2_cash_panel
    save `A2_cash_panel', replace

    * Baseline-only PSM among eligible T/C
    keep if t==baseline_year
    psmatch2 treatment_group_cash ${covariates2}, outcome(emp) neighbor(1) ties logit common


    keep if _support==1 & _weight>0
    keep id
    duplicates drop
    tempfile A2_matched_ids
    save `A2_matched_ids', replace

    use `A2_cash_panel', clear

    merge m:1 id using `A2_matched_ids', keep(match) nogen

    xtset id t
	* Do we truly have never-treated in the estimation sample?
	count if missing(gvar)           // if 0, there are no never-treated left

	* What’s the last treatment time and last calendar time?
	summ gvar if gvar<., meanonly
	local gmax = r(max)
	summ t, meanonly
	local tmax = r(max)

	* Are there controls available at the last period for the last cohort?
	count if t==`tmax' & (gvar>`tmax' | missing(gvar))   // not-yet-treated or never-treated at tmax


* TWFE DID (Cash crop selling)

gen byte post_cash = (t>=gvar_cash) if treatment_group_cash==1
replace post_cash = 0 if treatment_group_cash==0
gen byte treat_post_cash = treatment_group_cash*post_cash
di as result "DID (TWFE) on cash sale matched panel"
xtreg emp treat_post_cash i.t ${reg_covariates}, fe vce(cluster id)
	
	
* csdid: run for all except EthPanel-5
if ("`country'"=="EthPanel-5") {
di as txt "Skipping csdid for `country' (cash sale) as requested."
}
else {
    di as result "csdid for CASH SALE (clustered) on matched panel"
	csdid emp ${reg_covariates}, ivar(id) time(t) gvar(gvar_cash) never method(ipw) vce(cluster id)
	estat simple
	estat group
	estat calendar
	estat event
	estat all
}

    * (Optional) TWFE check
    * gen byte post_cash = (t>=gvar_cash) if treatment_group_cash==1
    * replace post_cash = 0 if treatment_group_cash==0
    * gen byte treat_post_cash = treatment_group_cash*post_cash
    * xtreg emp treat_post_cash i.t ${reg_covariates}, fe vce(cluster id)

    *==========================================================================
    * (Template) ANALYSIS 3: production of cash crop among sellers
    *==========================================================================
    di as text "(Template) Analysis 3: producing cash crop among sellers — plug in your production indicator (e.g., cc) to run."

    /*
    restore, preserve
    by id: egen byte ever_seller = max(cci>0 & cci<.)
    keep if ever_seller==1

    replace cc = 0 if missing(cc)   // your production indicator
    by id: egen byte ever_cc = max(cc>0)
    gen byte cc_at_baseline = (cc>0 & t==baseline_year)
    by id: egen byte was_cc_at_baseline = max(cc_at_baseline)

    gen double first_cc_year = t if cc>0
    by id (t): egen double gvar_cc = min(first_cc_year)

    gen byte treatment_group_cc = .
    replace treatment_group_cc = 0 if ever_cc==0
    replace treatment_group_cc = 1 if was_cc_at_baseline==0 & gvar_cc>baseline_year & gvar_cc<.

    drop if missing(treatment_group_cc)

    tempfile A3_cc_panel
    save `A3_cc_panel', replace

    keep if t==baseline_year
    psmatch2 treatment_group_cc ${covariates2}, outcome(emp) neighbor(1) ties logit common

    keep if _support==1 & _weight>0
    keep id
    duplicates drop
    tempfile A3_matched_ids
    save `A3_matched_ids', replace

    use `A3_cc_panel', clear
    merge m:1 id using `A3_matched_ids', keep(match) nogen

    xtset id t

    csdid emp ${reg_covariates}, ivar(id) time(t) gvar(gvar_cc) notyet
    estat event, window(-4 4)
    estat pretrend, window(-4 -1)
    */
	
restore

} // end foreach

di as result _dup(70) "="
di as result "          ALL ANALYSES COMPLETE"
di as result _dup(70) "="


///// Note: Ghana not panel; Ethiopia has only two waves if 2012 is dropped, so not suitable for staggered DID analysis.

log close








version 16.0
clear all
set more off
set seed 123456

*------------------------------------------------------------------------------
* PSM–DID master script 
* - Analysis 1: Impact of selling any crop (cci>0)
* - Analysis 2: Impact of selling cash crops among sellers
*------------------------------------------------------------------------------

* Install requirements 
cap which psmatch2
if _rc ssc install psmatch2, replace
cap which csdid
if _rc ssc install csdid, replace

*------------------------------
* Globals
*------------------------------

global data_path "C:/Users/andre/OneDrive/Desktop/BMGF/LSMS ISA/MAP/Data Fred/Panel"
global countries "MalPanel-5 TanPanel-5 UgaPanel-6 NigPanel-6 EthPanel-5"

*log using "$data_path/PSM-DID STATA output",replace

global covariates1 "lognf educh head_age head_sex dependency_ratio farmsize asset fadult madult educated infra_index vharvest"
global covariates2 "cci lognf educh head_age head_sex dependency_ratio farmsize asset fadult madult educated infra_index vharvest"

global reg_covariates "lognf educh head_age head_sex dependency_ratio vharvest farmsize asset"

foreach country of global countries {

    di as result _dup(70) "="
    di as result "          NOW PROCESSING: `country'"
    di as result _dup(70) "="

    use "$data_path/`country'.dta", clear

    * skip wave 1 if AWEAI not asked in first wave
    if inlist("`country'", "EthPanel-5", "TanPanel-5", "NigPanel-6") {
        quietly summarize t, meanonly
        drop if t==r(min)
    }

    xtset id t

    capture drop lognf
    generate double lognf = ln(Netnfcashinc + 1)

    egen baseline_year = min(t)


    *==========================================================================
    * ANALYSIS 1: any crop selling (cci>0)
    *==========================================================================
    di as result _dup(70) "-"
    di as result "Analysis 1: Impact of selling any crop (CCI)"
    di as result _dup(70) "-"

    bysort id (t): egen byte ever_seller = max(cci>0 & cci<.)
    gen byte sold_at_baseline = (cci>0 & cci<. & t==baseline_year)
    by id: egen byte was_seller_at_baseline = max(sold_at_baseline)

    gen double first_sale_year = t if (cci>0 & cci<.)
    by id (t): egen double gvar = min(first_sale_year)

    gen byte treatment_group = .
    replace treatment_group = 0 if ever_seller==0
    replace treatment_group = 1 if was_seller_at_baseline==0 & gvar>baseline_year & gvar<.

    * ---- Baseline-only matching with psmatch2
	preserve
	
    keep if t==baseline_year & inlist(treatment_group,0,1)
	
	

    * PSM (1-NN, with replacement; common support; logit PS)
    psmatch2 treatment_group ${covariates1}, outcome(emp) neighbor(1) ties logit common


    * Keep treated + matched controls inside common support
    keep if _support==1 & _weight>0
    keep id
    duplicates drop
    tempfile A1_matched_ids
    save `A1_matched_ids', replace

    restore, preserve

    * Merge back to full panel

    merge m:1 id using `A1_matched_ids', keep(match) nogen
	
	* Do we truly have never-treated in the estimation sample?
	count if missing(gvar)           // if 0, there are no never-treated left

	* What’s the last treatment time and last calendar time?
	summ gvar if gvar<., meanonly
	local gmax = r(max)
	summ t, meanonly
	local tmax = r(max)

	* Are there controls available at the last period for the last cohort?
	count if t==`tmax' & (gvar>`tmax' | missing(gvar))   // not-yet-treated or never-treated at tmax


    xtset id t

    * TWFE DID
    gen byte post = (t>=gvar) if treatment_group==1
    replace post = 0 if treatment_group==0
    gen byte treat_post = treatment_group*post
    di as result "DID (TWFE) on CCI matched panel"
    xtreg emp treat_post i.t ${reg_covariates}, fe vce(cluster id)

    * Staggered DID (csdid)
	* csdid: run for all except EthPanel-5
if ("`country'"=="EthPanel-5") {
di as txt "Skipping csdid for `country' as requested."
}
else {
    di as result "csdid for CCI (clustered)"
	csdid emp ${reg_covariates}, ivar(id) time(t) gvar(gvar) never vce(cluster id) method(ipw)
	estat simple
	estat group
	estat calendar
	estat event
	estat all
}
	
	
restore

    *==========================================================================
    * ANALYSIS 2: cash crop sales among sellers
    *==========================================================================
    di as result _dup(70) "-"
    di as result "Analysis 2: Impact of selling CASH CROPS (cci>0 sample)"
    di as result _dup(70) "-"

    preserve

    replace trader = 0 if missing(trader)
    by id: egen byte ever_cash_seller = max(trader>0)

    gen byte sold_cash_at_baseline = (trader>0 & t==baseline_year)
    by id: egen byte was_cash_seller_at_baseline = max(sold_cash_at_baseline)

    gen double first_cash_sale_year = t if trader>0
    by id (t): egen double gvar_cash = min(first_cash_sale_year)

    gen byte treatment_group_cash = .
    replace treatment_group_cash = 0 if ever_cash_seller==0
    replace treatment_group_cash = 1 if was_cash_seller_at_baseline==0 & gvar_cash>baseline_year & gvar_cash<.

    drop if missing(treatment_group_cash)

    tempfile A2_cash_panel
    save `A2_cash_panel', replace

    * Baseline-only PSM among eligible T/C
    keep if t==baseline_year
    psmatch2 treatment_group_cash ${covariates2}, outcome(emp) neighbor(1) ties logit common


    keep if _support==1 & _weight>0
    keep id
    duplicates drop
    tempfile A2_matched_ids
    save `A2_matched_ids', replace

    use `A2_cash_panel', clear

    merge m:1 id using `A2_matched_ids', keep(match) nogen

    xtset id t
	* Do we truly have never-treated in the estimation sample?
	count if missing(gvar)           // if 0, there are no never-treated left

	* What’s the last treatment time and last calendar time?
	summ gvar if gvar<., meanonly
	local gmax = r(max)
	summ t, meanonly
	local tmax = r(max)

	* Are there controls available at the last period for the last cohort?
	count if t==`tmax' & (gvar>`tmax' | missing(gvar))   // not-yet-treated or never-treated at tmax


* TWFE DID (Cash crop selling)

gen byte post_cash = (t>=gvar_cash) if treatment_group_cash==1
replace post_cash = 0 if treatment_group_cash==0
gen byte treat_post_cash = treatment_group_cash*post_cash
di as result "DID (TWFE) on cash sale matched panel"
xtreg emp treat_post_cash i.t ${reg_covariates}, fe vce(cluster id)
	
	
* csdid: run for all except EthPanel-5
if ("`country'"=="EthPanel-5") {
di as txt "Skipping csdid for `country' (cash sale) as requested."
}
else {
    di as result "csdid for CASH SALE (clustered) on matched panel"
	csdid emp ${reg_covariates}, ivar(id) time(t) gvar(gvar_cash) never method(ipw) vce(cluster id)
	estat simple
	estat group
	estat calendar
	estat event
	estat all
}

    * (Optional) TWFE check
    * gen byte post_cash = (t>=gvar_cash) if treatment_group_cash==1
    * replace post_cash = 0 if treatment_group_cash==0
    * gen byte treat_post_cash = treatment_group_cash*post_cash
    * xtreg emp treat_post_cash i.t ${reg_covariates}, fe vce(cluster id)

    *==========================================================================
    * (Template) ANALYSIS 3: production of cash crop among sellers
    *==========================================================================
    di as text "(Template) Analysis 3: producing cash crop among sellers — plug in your production indicator (e.g., cc) to run."

    /*
    restore, preserve
    by id: egen byte ever_seller = max(cci>0 & cci<.)
    keep if ever_seller==1

    replace cc = 0 if missing(cc)   // your production indicator
    by id: egen byte ever_cc = max(cc>0)
    gen byte cc_at_baseline = (cc>0 & t==baseline_year)
    by id: egen byte was_cc_at_baseline = max(cc_at_baseline)

    gen double first_cc_year = t if cc>0
    by id (t): egen double gvar_cc = min(first_cc_year)

    gen byte treatment_group_cc = .
    replace treatment_group_cc = 0 if ever_cc==0
    replace treatment_group_cc = 1 if was_cc_at_baseline==0 & gvar_cc>baseline_year & gvar_cc<.

    drop if missing(treatment_group_cc)

    tempfile A3_cc_panel
    save `A3_cc_panel', replace

    keep if t==baseline_year
    psmatch2 treatment_group_cc ${covariates2}, outcome(emp) neighbor(1) ties logit common

    keep if _support==1 & _weight>0
    keep id
    duplicates drop
    tempfile A3_matched_ids
    save `A3_matched_ids', replace

    use `A3_cc_panel', clear
    merge m:1 id using `A3_matched_ids', keep(match) nogen

    xtset id t

    csdid emp ${reg_covariates}, ivar(id) time(t) gvar(gvar_cc) notyet
    estat event, window(-4 4)
    estat pretrend, window(-4 -1)
    */
	
restore

} // end foreach

di as result _dup(70) "="
di as result "          ALL ANALYSES COMPLETE"
di as result _dup(70) "="


///// Note: Ghana not panel; Ethiopia has only two waves if 2012 is dropped, so not suitable for staggered DID analysis.

log close

